library(tidyverse)
library(ggthemes)


city.data <- read.csv('Data/city_data_minimal.csv')
ois.min <- read.csv('Data/OIS_Dataset_Minimal.csv')
load("Data/lemas/2020/38651-0001-Data.rda")
lemas.2020 <- da38651.0001
rm('da38651.0001')


city.data$first.year <- rep(NA,dim(city.data)[1])
city.data$last.year <- rep(NA,dim(city.data)[1])
for(i in 1:nrow(city.data)){
  if(grepl('-',city.data$Range.Provided.By.City[i])){
    city.data$first.year[i] <- unlist(strsplit(as.character(city.data$Range.Provided.By.City[i]),'-',fixed=TRUE))[1]
    city.data$last.year[i] <- unlist(strsplit(as.character(city.data$Range.Provided.By.City[i]),'-',fixed=TRUE))[2]
  }
}


lemas.2020$LEAR_ID <- as.integer(as.character(lemas.2020$AGENCYID))
city.with.lemas <- left_join(city.data,lemas.2020, by='LEAR_ID')

## these next lines need help. left_join creating duplicate rows
ois.with.lemas <- left_join(ois.min,lemas.2020, by='LEAR_ID')
ois.city <- left_join(ois.min,city.data,by='LEAR_ID')

city.with.lemas$tech.staff <- rowSums(city.with.lemas[,c('SUP_OTH_LIM','SUP_OTH_SWN','SUP_OTH_NON')], na.rm=TRUE)
city.with.lemas$tech.staff[is.na(city.with.lemas$TECH_NON) & is.na(city.with.lemas$TECH_SWN) & is.na(city.with.lemas$TECH_LIM)] <- NA



####################################
####################################
##
##    Fogure 3.1
##
####################################
####################################

pdf('FinalFigures/fig3-1.pdf',5,4)
city.with.lemas %>%
  mutate(responded = ifelse(Request.Status=='Received',1,0)) %>%
  filter(Request.Status != 'County Sheriff') %>%
  ggplot() +
  geom_smooth(aes(x=log(tech.staff+1),y=responded), color='black') +
  geom_point(aes(x=log(tech.staff+1),y=responded)) +
  ylim(0,1) + 
  ylab('Provided Any Data') +
  xlab('Technical Support Staff') +
  scale_x_continuous(breaks=c(log(1),log(10),log(30),log(100),log(300),log(1000)), 
                     labels=c("1","10","30","100","300","1000")) +
  scale_y_continuous(expand=c(0,0), limits=c(-50,100), 
                     breaks = c(0,1),labels = c('No','Yes')) +
  coord_cartesian(ylim=c(-.01,1.01)) +
  theme_tufte() +
  theme(axis.line = element_line(color = 'black')) +
  
  labs(title='Support Staff and Sharing Police Shooting Records',
       x='Total Technical Support Staff',
       y='Provided Any Records') +
  theme(plot.caption = element_text(hjust = 0))
dev.off()


########## end for replication purposes







city.data %>%
  ggplot() +
  geom_histogram(aes(x=Request.Status), stat = 'count')

group.colors <- c(A = "#333BFF", B = "#CC6600", C ="#9633FF", D = "#E2FF33", E = "#E3DB71")

pdf('Figures/FOIA/requestStatusColor.pdf',10,8)
city.data %>%
  mutate(have.data = ifelse(Request.Status %in%
                              c('Received','Non-responsive-Third Party',
                                'Rejected-Third Party'),'Have Data',
                            Request.Status)) %>%
  dplyr::select(have.data,Request.Status) %>%
  filter(Request.Status != 'County Sheriff') %>%
  mutate(Request.Status = factor(Request.Status,
                                 levels = c('Rejected-Third Party',
                                            'Non-responsive-Third Party',
                                            'Received',
                                            'Not Received',
                                            'Rejected'))) %>%
  ggplot(aes(x = have.data,
             fill = Request.Status)) + 
  geom_bar(stat = "count",
           position = "stack") +
  scale_fill_manual(values=c("blue", "green", "black",
                             'grey','red')) +
  bbc_style() + 
  theme(legend.position = 'none') +
  scale_x_discrete(guide = guide_axis(angle = 25)) +
  labs(title='Request Status',
       subtitle = 'Count of departments by current open records\nrequest status')
dev.off()

pdf('Figures/FOIA/citiesSampled.pdf',5,5)
plot_usmap() +
  geom_point(data=coords,aes(x = x, y = y), size = 1.5,
             color = "black", alpha = 0.4) +
  theme(legend.position = "none") 
dev.off()

received <- coords %>% filter(Request.Status == 'Received')
rejected <- coords %>% filter(Request.Status == 'Rejected')
third.party <- coords %>% filter(Request.Status %in% c('Non-responsive-Third Party','Rejected-Third Party'))
waiting <- coords %>% filter(Request.Status == 'Not Received')
pdf('Figures/FOIA/citiesSampledWithStatus.pdf',5,5)
plot_usmap() +
  geom_point(data=waiting,aes(x = x, y = y), size = 1.5, alpha = 0.8, colour = 'grey') +
  geom_point(data=rejected,aes(x = x, y = y), size = 1.5, alpha = 0.8, colour = 'red') +
  geom_point(data=third.party,aes(x = x, y = y), size = 1.5, alpha = 0.8, colour = 'blue') +
  geom_point(data=received,aes(x = x, y = y), size = 1.5, alpha = 0.8, colour = 'green') +
  theme(legend.position = "none") 
dev.off()

rm('coords')


####################################
####################################
##
##    Time to Data
##
####################################
####################################

pdf('Figures/FOIA/TimeToDataHistogram.pdf',6,5)
city.data %>%
  mutate(timeToData = as.numeric(difftime(mdy(city.data$DATE.OF.FINAL.RECEIPT),mdy(DATE.OF.FIRST.REQUEST),units='days'))) %>%
  filter(Request.Status != 'County Sheriff') %>%  
  ggplot(aes(x=timeToData)) +
  geom_histogram() +
  theme_tufte()  +
  theme(axis.line = element_line(color = 'black')) +
  labs(title= 'Time to Provide Police Shooting Records',
       subtitle='Number of days from intial request to receipt of any records from department',
       x='Days',y='Total Departments')  +
  scale_y_continuous(expand = c(0,0)) +
  scale_x_continuous(expand = c(0,0))
dev.off()



####################################
####################################
##
##    Response Rate by State
##
####################################
####################################

responses <- city.data %>%
  mutate(responded = ifelse(Request.Status=='Received',1,0)) %>%
  filter(Request.Status != 'County Sheriff') %>%
  group_by(STATE) %>%
  summarise(Responded=mean(responded))

responses <- as.data.frame(responses) 
MainStates <- map_data("state")
abb.walk <- as.data.frame(cbind(state.abb,tolower(state.name)))
response.data <- left_join(abb.walk,responses, by = c('state.abb' = 'STATE'))
names(response.data) <- c('state.abb','state','Responded') 
pdf('Figures/FOIA/responseRateMap.pdf',5,5)
plot_usmap(data = response.data, values = 'Responded') +
  scale_fill_continuous(name = "Proportion of Departments",label = scales::comma,
                        limits = c(0,1)) +
  labs(title='Response Rate Among Police Departments') +
  theme(legend.position = "bottom")
dev.off()

rm('responses','response.data','abb.walk','MainStates')



####################################
####################################
##
##    Years of Data
##
####################################
####################################

coords <- city.data %>%
  filter(!is.na(first.year)) %>%
  mutate(totalYears = ifelse(!is.na(first.year), as.numeric(last.year)-as.numeric(first.year)+1,0)) %>%
  dplyr::select(lat,lon,totalYears) %>%
  filter(!is.na(lat)) %>%
  usmap_transform(input_names = c('lon','lat'))

pdf('Figures/FOIA/yearsOfDataMap.pdf',5,5)
plot_usmap() +
  geom_point(data=coords,aes(x = x, y = y, size = totalYears),
             color = "blue", alpha = 0.25) +
  labs(size = "Total Years") +
  theme(legend.position = "bottom") +
  scale_size_binned(breaks=c(1,5,0,15,20))
dev.off()
rm('coords')



####################################
####################################
##
##    Responsiveness v City Council Characteristics
##
####################################
####################################

pdf('Figures/FOIA/responsivenessVFemaleCouncilPercent.pdf',9,8)
city.data %>%
  left_join(fog.data %>% filter(year==2019) %>%
              dplyr::select(ORI7,femcoun)) %>%
  mutate(responded = ifelse(Request.Status=='Received',1,0)) %>%
  filter(Request.Status != 'County Sheriff') %>%
  ggplot() +
  geom_smooth(aes(x=femcoun,y=responded)) +
  geom_point(aes(x=femcoun,y=responded)) +
  bbc_style() +
  labs(title='Female Leadership and Responsiveness',
       subtitle='Probability of responding to open recordd request\nand proportion of citiy council that is women',
       caption = 'Note: \'Yes\' indicates any data was provided at all.' ) +
  theme(plot.caption = element_text(hjust = 0)) +
  scale_y_continuous(expand=c(0,0), limits=c(-50,100), 
                     breaks = c(0,1),labels = c('No','Yes')) +
  coord_cartesian(ylim=c(-.01,1.01))
dev.off()


pdf('Figures/FOIA/responsivenessVBlackLatinoCouncilPercent.pdf',9,8)
city.data %>%
  left_join(fog.data %>% filter(year==2019) %>%
              dplyr::select(ORI7,bllcoun)) %>%
  mutate(responded = ifelse(Request.Status=='Received',1,0)) %>%
  filter(Request.Status != 'County Sheriff') %>%
  ggplot() +
  geom_smooth(aes(x=bllcoun,y=responded)) +
  geom_point(aes(x=bllcoun,y=responded)) +
  bbc_style() +
  labs(title='Minority Leadership and Responsiveness',
       subtitle='Probability of responding to open recordd request\nand proportion of citiy council that is Black or Latino',
       caption = 'Note: \'Yes\' indicates any data was provided at all.' ) +
  theme(plot.caption = element_text(hjust = 0)) +
  scale_y_continuous(expand=c(0,0), limits=c(-50,100), 
                     breaks = c(0,1),labels = c('No','Yes')) +
  coord_cartesian(ylim=c(-.01,1.01))
dev.off()



####################################
####################################
##
##    Responsiveness v Scorecard
##
####################################
####################################

pdf('Figures/FOIA/responsivenessVScorecard.pdf',9,9)
city.data %>% 
  left_join(.,scorecard, by=c('ORI9'='ori')) %>%
  mutate(responded = ifelse(Request.Status=='Received',1,0)) %>%
  filter(Request.Status != 'County Sheriff') %>%
  mutate(score = as.numeric(sub("%", "", calc_overall_score))) %>%
  ggplot() +
  geom_smooth(aes(x=score,y=responded)) +
  geom_point(aes(x=score,y=responded)) +
  bbc_style() +
  labs(title='Police Scorecard and Responsiveness',
       subtitle='Probability of responding to open recordd request\nrating on Police Scorecard',
       caption = 'Note: \'Yes\' indicates any data was provided at all.' ) +
  theme(plot.caption = element_text(hjust = 0)) +
  ylim(0,1) + 
  scale_y_continuous(expand=c(0,0), limits=c(-50,100), 
                     breaks = c(0,1),labels = c('No','Yes')) +
  coord_cartesian(ylim=c(-.01,1.01))
dev.off()


####################################
####################################
##
##    Responsiveness v Department Size
##
####################################
####################################

pdf('Figures/FOIA/respondedAgainstDepartmentSize.pdf',10,7)
city.with.lemas %>%
  rowwise() %>% 
  mutate(responded = ifelse(Request.Status=='Received',1,0),
         size = sum(FTNON_2019,FTLIM_2019,FTSWORN_2019,na.rm=TRUE)) %>%
  filter(Request.Status != 'County Sheriff') %>%
  filter(size>0) %>%
  ggplot() +
  geom_smooth(aes(x=log(size),y=responded)) +
  geom_point(aes(x=log(size),y=responded)) +
  ylim(0,1) + 
  scale_x_continuous(breaks=c(log(150),log(500),log(2500),log(10000),log(50000)), 
                     labels=c("150","500","2500","10000","50000")) +
  scale_y_continuous(expand=c(0,0), limits=c(-50,100), 
                     breaks = c(0,1),labels = c('No','Yes')) +
  coord_cartesian(ylim=c(-.01,1.01)) +
  bbc_style() +
  labs(title='Department Size and Sharing OIS Records',
       subtitle='Probability of complying with open records request\nagainst total number of full-time employees',
       caption ='Each point indicates whether the city provided any data in response to our request') +
  theme(plot.caption = element_text(hjust = 0))
dev.off()

pdf('Figures/FOIA/respondedAgainstTechStaff.pdf',10,7)
city.with.lemas %>%
  mutate(responded = ifelse(Request.Status=='Received',1,0)) %>%
  filter(Request.Status != 'County Sheriff') %>%
  ggplot() +
  geom_smooth(aes(x=log(tech.staff+1),y=responded)) +
  geom_point(aes(x=log(tech.staff+1),y=responded)) +
  ylim(0,1) + 
  ylab('Provided Any Data') +
  xlab('Technical Support Staff') +
  scale_x_continuous(breaks=c(log(1),log(10),log(30),log(100),log(300),log(1000)), 
                     labels=c("1","10","30","100","300","1000")) +
  scale_y_continuous(expand=c(0,0), limits=c(-50,100), 
                     breaks = c(0,1),labels = c('No','Yes')) +
  coord_cartesian(ylim=c(-.01,1.01)) +
  bbc_style() +
  labs(title='Support Staff and Sharing OIS Records',
       subtitle='Probability of complying with open records request\nagainst number of tecxhnical support staff employed',
       caption ='Each point indicates whether the city provided any data in response to our request') +
  theme(plot.caption = element_text(hjust = 0))
dev.off()



pdf('Figures/FOIA/respondedAgainstBudgetPerCapita.pdf',10,7)
city.with.lemas %>%
  mutate(responded = ifelse(Request.Status=='Received',1,0)) %>%
  filter(Request.Status != 'County Sheriff') %>%
  ggplot() +
  geom_smooth(aes(x=OPBUDGET/PRIMARYPOP2020,y=responded)) +
  geom_point(aes(x=OPBUDGET/PRIMARYPOP2020,y=responded)) +
  ylim(0,1) + 
  ylab('Provided Any Data') +
  xlab('Operating Budget Per Capita ($)') +  
  scale_y_continuous(expand=c(0,0), limits=c(-50,100), 
                     breaks = c(0,1),labels = c('No','Yes')) +
  coord_cartesian(ylim=c(-.01,1.01)) +
  bbc_style() +
  labs(title='Operating Budgets and Sharing OIS Records',
       subtitle='Probability of complying with open records request\nagainst department\'s per-capita FY 2020 operating budget',
       caption ='Each point indicates whether the city provided any data in response to our request') +
  theme(plot.caption = element_text(hjust = 0))
dev.off()
